* Project      :  The Full, Persistent, and Symmetric Pass-Through of a Temporary VAT Cut
* Authors      :  Márcia Silva-Pereira, João Quelhas, Tiago Bernardino, Ricardo Duque Gabriel
* Date         :  01/04/2025
* Description  :  Dataset descriptives
* Dependencies :
* Modifications: (add date, author and change)

*********************************************************
*               Dataset Descriptives                    *
*********************************************************

* FIGURE 1: Food Prices during the Temporary VAT Cut in Portugal
*********************************************************

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

// Compute a daily average index for each day for the Zero VAT goods.
preserve
keep if treatment == 1
sort date
collapse (mean) mean_treated=index_23032023, by(date)
save "${path_work}/auxiliar/treated_index_mean.dta", replace
restore

preserve
keep if treatment == 1
sort date
collapse (semean) se_treated=index_23032023, by(date)
save "${path_work}/auxiliar/treated_index_se_mean.dta", replace
restore

// Compute a daily average index for each day for the non-Zero VAT goods.
preserve
keep if treatment == 0
sort date
collapse (mean) mean_control=index_23032023, by(date)
save "${path_work}/auxiliar/control_index_mean.dta", replace
restore

preserve
keep if treatment == 0
sort date
collapse (semean) se_control=index_23032023, by(date)
save "${path_work}/auxiliar/control_index_se_mean.dta", replace
restore

// Merge the two datasets created.
use "${path_work}/auxiliar/treated_index_mean.dta", clear
merge m:1 date using "${path_work}/auxiliar/treated_index_se_mean.dta"
drop _merge
merge m:1 date using "${path_work}/auxiliar/control_index_mean.dta"
drop _merge
merge m:1 date using "${path_work}/auxiliar/control_index_se_mean.dta"
drop _merge

// Delete the temporary files.
erase "${path_work}/auxiliar/treated_index_mean.dta"
erase "${path_work}/auxiliar/treated_index_se_mean.dta"
erase "${path_work}/auxiliar/control_index_mean.dta"
erase "${path_work}/auxiliar/control_index_se_mean.dta"

// Keep data only for 2023 afterwards.
keep if date >= td(01/01/2023)
keep if date <= td(30/01/2024)

// Compute the standard error limits
gen lower_limit_treated = mean_treated - 2*se_treated
gen upper_limit_treated = mean_treated + 2*se_treated
gen lower_limit_control = mean_control - 2*se_control
gen upper_limit_control = mean_control + 2*se_control

// Graph of the evolution of the daily price indices.
twoway ///
    (tsline mean_treated, lcolor("dkorange") lpattern(solid) lwidth(medthin)) ///
    (tsline mean_control, lcolor("dknavy") lpattern(solid) lwidth(medthin)) ///
    (rarea  lower_limit_treated upper_limit_treated date, color("dkorange%25") lwidth(none none none none)) ///
    (rarea  lower_limit_control upper_limit_control date, color("dknavy%25") lwidth(none none none none)), ///
    ytitle("Index (23/03/2023 = 100)", size(medsmall)) yscale(lwidth(none) line) xscale(lcolor(black) lwidth(none) line) ///
    tline(24/03/2023 27/10/2023, lcolor(gray) lpattern(dash) lwidth(vthin)) ylabel(, labsize(medsmall)) ///
    tline(18/04/2023 04/01/2024, lcolor(black) lpattern(dash) lwidth(vthin)) ylabel(, labsize(medsmall)) ///
    ttitle("") tlabel(01jan2023 "Jan-2023" 01feb2023 "Feb-2023"  01mar2023 "Mar-2023"  01apr2023 "Apr-2023"  01may2023 "May-2023" ///
    01jun2023 "Jun-2023" 01jul2023 "Jul-2023" 01Aug2023 "Aug-2023" 01Sep2023 "Sep-2023" 01oct2023 "Oct-2023" ///
    01nov2023 "Nov-2023" 01dec2023 "Dec-2023" 01jan2024 "Jan-2024" 01feb2024 "Feb-2024", labsize(medsmall) angle(forty_five) format(%tdMon-CCYY)) ///
    legend(order(1 "VAT Cut Food Items (treated)" 2 "Remaining Food Items (control)") rows(1) pos(6) region(lcolor(black)) size(medsmall)) plotregion(lcolor(black)) ///
    yline(100, lwidth(vthin) lcolor(black) lpattern(solid)) yline(94.34, lwidth(vthin) lcolor(gs13) lpattern(solid))

// Export the graph.
graph export "${results_figures}/figure_1.png", as(png) replace width(2400) height(1372)

*********************************************************


* FIGURE 2: Frequency of Positive and Negative Price Changes
*********************************************************

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

gen treatment_reverse = 1 if treatment == 0
replace treatment_reverse = 0 if treatment == 1

// Collapse the data to get the last observation for each product in each week, considering 'treatment'.
collapse (last) price, by(id_sm week_year treatment_reverse)

// Set the data as a panel dataset using 'id_sm' as the panel variable and 'week_year' as the time variable.
xtset id_sm week_year, weekly

// Generate a variable 'relative_fi' representing the ratio of the current price to the lagged price.
bys id_sm: gen relative_fi = price / L.price

// Drop observations where 'relative_fi' is missing.
drop if relative_fi ==.

// Generate binary indicators for price changes.
bys id_sm: gen no_change   = 1 if relative_fi == 1
bys id_sm: gen change_up   = 1 if relative_fi > 1
bys id_sm: gen change_down = 1 if relative_fi < 1

// Replace missing values in binary indicators with 0.
replace no_change   = 0   if no_change ==.
replace change_up   = 0   if change_up ==.
replace change_down = 0   if change_down ==.

// Convert binary indicators to percentage scale.
replace no_change   = no_change * 100
replace change_up   = change_up * 100
replace change_down = change_down * 100

// Collapse the data to get the mean of each price change category for each treatment group in each week.
collapse (mean) no_change (mean) change_up (mean) change_down, by(treatment_reverse week_year)

// Generate a variable 'change' representing the sum of 'change_up' and 'change_down'.
gen change = change_up + change_down

// Set the data as a panel dataset using 'treatment' as the panel variable and 'week_year' as the time variable.
xtset treatment week_year, weekly

// Define and apply labels to the 'treatment' variable.
label define lbl_treatment 1 "Remaining Food Items (control)" 0 "VAT Cut Food Items (treated)", replace
label values treatment_reverse lbl_treatment

// Keep only observations from the year 2023 and onwards.
keep if week_year >= tw(2023w1)
keep if week_year <= tw(2024w4)

// Graph the frequency of price changes.
xtline  change change_down, recast(bar) note("") fcolor("dkorange" "dknavy") ///
	lcolor("dkorange" "dknavy") lwidth(none none none none) ///
	ytitle("Percentage of items", size(medsmall)) ylabel(0(10)100) ///
	ttitle("") ///
	tlabel(2023w1  "Jan-2023"  2023w5  "Feb-2023"  2023w9 "Mar-2023" ///
               2023w13 "Apr-2023" 2023w18 "May-2023" 2023w22 "Jun-2023" 2023w26 "Jul-2023" 2023w30 "Aug-2023" ///
               2023w34 "Sep-2023" 2023w39 "Oct-2023" 2023w43 "Nov-2023" 2023w47 "Dec-2023" ///
	       2023w52  "Jan-2024" 2024w4 "Feb-2024", labsize(medsmall) angle(forty_five) format(%twMon_YY)) ///
	legend(order(1 "Price increases" 2 "Price decreases") rows(1) region(lcolor(black)) size(medsmall)) ///
	yscale(lcolor(black) lwidth(none) line) ylabel(, labsize(medsmall)) ///
	tscale(lcolor(black) lwidth(none) line) ///
	tline(2023w16 2024w1, lcolor(gray) lpattern(dash) lwidth(medthin)) ///
	plotregion(lcolor(black)) byopts(legend(rows(1) pos(6) region(lcolor(black)) size(medsmall)) note("")) 

// Export the graph as a png file.
graph export "${results_figures}/figure_2.png", as(png) replace width(2400) height(1372)

*********************************************************


* FIGURE 3: Magnitude of Price Changes around the Temporary VAT Cut in Portugal
*********************************************************

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

gen treatment_reverse = 1 if treatment == 0
replace treatment_reverse = 0 if treatment == 1

// Collapse the data to get the last observation for each product in each week, considering 'treatment'.
collapse (last) price, by(id_sm week_year treatment_reverse)

// Set the data as a panel dataset using 'id_sm' as the panel variable and 'week_year' as the time variable.
xtset id_sm week_year, weekly

// Generate a variable 'relative_fi' representing the ratio of the current price to the lagged price.
bys id_sm: gen relative_fi = (price / L.price -1)*100

// Drop observations where 'relative_fi' is missing.
drop if relative_fi ==.

// Define and apply labels to the 'treatment' variable.
label define lbl_treatment 1 "Remaining Food Items (control)" 0 "VAT Cut Food Items (treated)", replace
label values treatment_reverse lbl_treatment

histogram relative_fi if (relative_fi > -24 & relative_fi < 24) & week_year == tw(2023w16), ///
	width(2) percent fcolor(dknavy) lcolor(dknavy) binrescale ///
	ytitle("Percentage of items", size(medsmall)) yscale(noline) ylabel(#5) ///
	xtitle("Percentage change") xscale(noline) xlabel(-24(6)24) ///
	by(treatment_reverse, note("")) plotregion(lcolor(black))

// Export the graph as a png file.
graph export "${results_figures}/figure_3.png", as(png) replace width(2400) height(1372)

*********************************************************


* FIGURE A.4: Prices of Beans during the 2023 Temporary VAT Cut in Portugal
*********************************************************

local variable ${variable}

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

keep if strpos(name, "feijao") & (strpos(name, "encarnado") | strpos(name, "vermelho") | strpos(name, "branco") | strpos(name, "frade") | strpos(name, "preto") | strpos(name, "manteiga") | strpos(name, "catarino") | strpos(name, "cacarola") | strpos(name, "mungo") | strpos(name, "azuki"))

drop if strpos(name, "salada") | strpos(name, "migas") | strpos(name, "hamburguer") | strpos(name, "atum") | strpos(name, "sopa") | strpos(name, "cornitas")

// Compute a daily average index with a simple average for each day for the Zero VAT goods.
preserve
keep if treatment == 1
keep id_sm id date name supermarket price treatment ecoicop5
sort date
tsfill
foreach v of varlist id date name supermarket price treatment ecoicop5 {
			
	// Replace missing values with the previous value if conditions are met.	
	by id_sm (date): replace `v' = `v'[_n-1] if missing(`v') & _n > 1 & !missing(`v'[_n-1]) & _n < _N

	}

// Compute a price index with base in the day before policy announcement.
local   base_date 23/03/2023
egen    base_price_23032023 = total(`variable' / (date == td(`base_date'))), by(id_sm)
gen     index_23032023 = (`variable' / base_price_23032023) * 100

collapse (median) mean_treated=index_23032023, by(date)
save "${path_work}/auxiliar/treated_index.dta", replace
restore

// Compute a daily average index with a simple average for each day for the non-Zero VAT goods.
preserve

keep if treatment == 0
sort date
keep id_sm id date name supermarket price treatment ecoicop5
tsfill
foreach v of varlist id date name supermarket price treatment ecoicop5 {
			
	// Replace missing values with the previous value if conditions are met.	
	by id_sm (date): replace `v' = `v'[_n-1] if missing(`v') & _n > 1 & !missing(`v'[_n-1]) & _n < _N
	}

// Compute a price index with base in the day before policy announcement.
local   base_date 23/03/2023
egen    base_price_23032023 = total(`variable' / (date == td(`base_date'))), by(id_sm)
gen     index_23032023 = (`variable' / base_price_23032023) * 100

collapse (median) mean_control=index_23032023, by(date)
save "${path_work}/auxiliar/control_index.dta", replace
restore

// Merge the two datasets created.
use "${path_work}/auxiliar/treated_index.dta", clear
merge m:1 date using "${path_work}/auxiliar/control_index.dta"
drop _merge

// Keep data only for 2023 afterwards.
keep if date >= td(03/01/2023)
keep if date <= td(30/01/2024)

// Graph of the evolution of the daily price indices.
twoway ///
    (tsline mean_treated, lcolor("dkorange") lpattern(solid) lwidth(medthin)) ///
    (tsline mean_control, lcolor("dknavy") lpattern(solid) lwidth(medthin)), ///
    ytitle("Index (23/03/2023 = 100)", size(medsmall)) ylabel(90(5)105) yscale(lwidth(none) line) xscale(lcolor(black) lwidth(none) line) ///
    tline(24/03/2023 27/10/2023, lcolor(gray) lpattern(dash) lwidth(vthin)) ylabel(, labsize(medsmall)) ///
    tline(18/04/2023 04/01/2024, lcolor(black) lpattern(dash) lwidth(vthin)) ylabel(, labsize(medsmall)) ///
    ttitle("") tlabel(#14, labsize(medsmall) angle(forty_five) format(%tdMon-CCYY)) ///
    legend(order(1 "Red Beans, Black-Eyed Peas and Chickpeas (treated)" 2 "Other Beans (control)") rows(1) pos(6) region(lcolor(black)) size(medsmall)) plotregion(lcolor(black)) ///
    yline(100, lwidth(vthin) lcolor(black) lpattern(solid)) yline(94.34, lwidth(vthin) lcolor(gs13) lpattern(solid))

// Export the graph.
graph export "${results_figures}/figure_a4.png", as(png) replace width(2400) height(1372)

// Delete the temporary files.
erase "${path_work}/auxiliar/treated_index.dta"
erase "${path_work}/auxiliar/control_index.dta"

*********************************************************


* TABLE B.3: Number of Items for Each Category
*********************************************************

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

duplicates drop id_sm, force
 
// Open a log file to capture output
log using "${results_tables}/table_b3.txt", text replace
tab ecoicop4 treatment
tab ecoicop5 treatment
tab white_label treatment
tab domestic treatment
tab big treatment
tab high_price treatment
tab treatment
log close

*********************************************************
